#!/usr/bin/env bash
############################################################
#AUTHOR         :mazeng
#DESCRIBE       :用户基础特征
#CREATE_DATE    :2022-04-25
#UPDATE_DATE    :2022-04-25 10:21:23
############################################################

source /etc/profile
file_name=$1

project_dir=$(dirname "$PWD")

hdfs dfs -put ${project_dir}/upload/${file_name}   oss://emr-datalake.cn-beijing.oss-dls.aliyuncs.com/test/

eval "$(conda shell.bash hook)"

#获取文件名剔除前后缀
log_file="./songzhenxing_run_spark.log"

spark_submit=`which spark-submit`
if ! [ -d ${log_path} ]
then
    mkdir -p ${log_path}
fi


conda activate app_message_push && \
    cd /home/hadoop/test/songzhenxing &&
    ${spark_submit} --master yarn \
                    --queue important \
                    --driver-memory 10G \
                    --deploy-mode client \
                    --archives  oss://emr-datalake.cn-beijing.oss-dls.aliyuncs.com/user/ai/prod/app_message_push.zip#environment \
                    --conf spark.pyspark.python=./environment/app_message_push/bin/python3 \
                    --executor-cores 5 \
                    --num-executors 8 \
                    --executor-memory 10G \
                    --conf spark.executor.memoryOverhead=2G \
                    --conf spark.driver.maxResultSize=10G \
                    --conf spark.pyspark.driver.python=python3 \
                    --name songzhenxing_pyspark_submit \
                    csv_output_hive.py  ${file_name} >> "${log_file}" 2>&1 &&
    conda deactivate